تمرین سری سوم: از لالیگا تا لیگ برتر
با استفاده از داده های لیگ دسته اول اسپانیا به سوالات زیر پاسخ دهید. از هر دو ابزار ggplot2 و highcharter برای این کار تصویرسازی استفاده نمایید.
پیش از شروع به حل تمرین تلاش می کنیم تا داده را کمی مناسب تر کنیم تا به صورتی قابل استفاده تر در بیاید. در اینجا داده ای می سازیم که شبیه به چارت هر لیگ است. همچنین این نکته را هم که امتیازات برد ها پیش از سال ۹۵، ۲ امتیاز بوده به جای ۳ امتیاز را محاسبه می کنیم ولی در بعضی جاها از همان ۳ امتیازه استفاده می کنیم و در بعضی جاها از ۲ امتیازه(در سوال اول بخش اول از ۲ امتیازه استفاده شده است).
library(engsoccerdata)
library(dplyr)
library(ggplot2)
library(highcharter)
library(kableExtra)
library(knitr)
library(tidyr)
la <- spain %>%
filter(tier == 1,
round == "league")
lah <- la %>%
mutate(
team = home,
opp = visitor,
GF = hgoal,
GA = vgoal,
GD = hgoal - vgoal,
win = hgoal > vgoal,
lose = hgoal < vgoal,
draw = hgoal == vgoal
) %>%
mutate(score = (Season >= 1995) * win * 3 + (Season <= 1994)*win * 2 + draw) %>%
mutate(score3 = win * 3 + draw) %>%
select(-home, -visitor, -hgoal, -vgoal, -HT, -FT)
lav <- la %>%
mutate(
team = visitor,
opp = home,
GF = vgoal,
GA = hgoal,
GD = vgoal - hgoal,
win = hgoal < vgoal,
lose = hgoal > vgoal,
draw = hgoal == vgoal
) %>%
mutate(score = (Season >= 1995) * win * 3 + (Season <= 1994) *win* 2 + draw) %>%
mutate(score3 = win * 3 + draw) %>%
select(-home, -visitor, -hgoal, -vgoal, -HT, -FT)
laAll <- rbind(lav, lah)
laCA <- laAll %>%
group_by(team, Season, tier, round, group) %>%
summarize(
GF = sum(GF),
GA = sum(GA),
GD = sum(GD),
win = sum(win),
lose = sum(lose),
draw = sum(draw),
score = sum(score),
score3 = sum(score3),
games = n()
) %>% ungroup()۱. تعداد قهرمانی های تیم ها در تاریخ لالیگا را استخراج کرده و نمودار ستونی آنها را رسم کنید.
برای این سوال لازم است چند قانون را رعایت کنیم. اولین قانون این است که پیش از ۱۹۹۵ امتیازات به این صورت محاسبه می شدند که هر برد ۲ امتیاز داشت که این مورد در قسمت قبلی رعایت شده است. مورد دیگر این است که پس از ۱۹۳۵ برای تعیین قهرمان ابتدا امتیاز، سپس تفاضل گل بازی رو در رو و سپس تفاضل گل کل را مقایسه می کنند. پیش از آن ابتدا امتیاز و سپس تفاضل گل را مقایسه می کردند. حال ابتدا سال هایی را که در آن ها ۲ تیم امتیاز مساوی داشتند را به دست می آوریدم، سپس از میان آن ها آنی را که قهرمان می شود محاسبه می کنیم و در نهایت همه ی قهرمانی ها را بدست می آوریم.
laCA %>%
group_by(Season) %>%
top_n(n = 1, wt = score) %>%
ungroup() -> all_cands
all_cands %>%
group_by(Season) %>%
filter(n() < 2) %>% ungroup() -> non_dups
all_cands %>%
group_by(Season) %>%
filter(n() >= 2) %>% ungroup() -> dups
rbind(dups %>% filter(Season <= 1935) %>% top_n(n = 1, wt = GD), non_dups) -> non_dups
dups <- dups %>% filter(Season > 1935)
kable(dups %>%
arrange(-Season) %>% select(team, Season,score))| team | Season | score |
|---|---|---|
| FC Barcelona | 2006 | 76 |
| Real Madrid | 2006 | 76 |
| Deportivo La Coruna | 1993 | 56 |
| FC Barcelona | 1993 | 56 |
| Athletic Bilbao | 1983 | 49 |
| Real Madrid | 1983 | 49 |
| Real Madrid | 1980 | 45 |
| Real Sociedad | 1980 | 45 |
| FC Barcelona | 1970 | 43 |
| Valencia CF | 1970 | 43 |
| FC Barcelona | 1959 | 46 |
| Real Madrid | 1959 | 46 |
| Athletic Bilbao | 1946 | 34 |
| Valencia CF | 1946 | 34 |
dups_winners <- NULL
for (s in unique(dups$Season)) {
cands <- dups %>% filter(Season == s)
x <- as.character(cands[1, 'team'])
y <- as.character(cands[2, 'team'])
game1 <- la %>% filter(home == x, visitor == y, Season == s)
game2 <- la %>% filter(home == y, visitor == x, Season == s)
gd = game1$hgoal - game1$vgoal - game2$hgoal + game2$vgoal
if (gd > 0) {
dups_winners <- rbind(dups_winners, cands[1,])
} else if (gd < 0) {
dups_winners <- rbind(dups_winners, cands[2,])
} else if (gd == 0) {
if (as.numeric(cands[1, 'GD']) > as.numeric(cands[2, 'GD'])) {
dups_winners <- rbind(dups_winners, cands[1,])
} else{
dups_winners <- rbind(dups_winners, cands[2,])
}
}
}
champions <- rbind(dups_winners, non_dups)
champions %>% select(team, Season) -> champions
champions %>% group_by(team) %>% summarize(championships = n()) %>%
arrange(-championships) %>%
hchart(type = "bar",
hcaes(x = team, y = championships, color = team),
name = "No. of Championships") %>%
hc_title(text = "All Championships") %>%
hc_xAxis(title = list(text = "Teams")) %>%
hc_yAxis(title = list(text = "No. of Championsips")) %>%
hc_add_theme(hc_theme_sandsignika())ggplot(
champions %>% group_by(team) %>% summarize(championships = n()) %>%
arrange(-championships),
aes(
x = reorder(team, championships),
y = championships,
fill = team
)
) +
geom_bar(stat = "identity") +
guides(fill = F) +
xlab("Teams") +
ylab("No. of Championsips") +
ggtitle("All Championships") + coord_flip()همانطور که در گروه درس گفته شده می توانستیم فرض کنیم در تمامی سال ها بردها ۳ امتیاز داشته اند و برای رتبه بندی برحسب امتیاز، تفاضل گل و گل زده عمل می کنیم در این صورت قهرمانی ها به این صورت خواهند بود: (در ادامه هم از این نوع امتیازدهی و رده بندی برای سایر سوال ها استفاده می کنیم)
champions <- laCA %>%
group_by(Season) %>%
top_n(n = 1, wt = score3) %>%
top_n(n = 1, wt = GD) %>%
top_n(n = 1, wt = GF) %>%
ungroup()
champions %>% select(team, Season) -> champions
champions %>% group_by(team) %>% summarize(championships = n()) %>%
arrange(-championships) %>%
hchart(type = "bar",
hcaes(x = team, y = championships, color = team),
name = "No. of Championships") %>%
hc_title(text = "All Championships") %>%
hc_xAxis(title = list(text = "Teams")) %>%
hc_yAxis(title = list(text = "No. of Championsips")) %>%
hc_add_theme(hc_theme_sandsignika())ggplot(
champions %>% group_by(team) %>% summarize(championships = n()) %>%
arrange(-championships),
aes(
x = reorder(team, championships),
y = championships,
fill = team
)
) +
geom_bar(stat = "identity") +
guides(fill = F) +
xlab("Teams") +
ylab("No. of Championsips") +
ggtitle("All Championships") + coord_flip()۲. کسل کننده ترین لیگ و تیم را بیابید. نمودار ده تیم و ده فصل کسل کننده را رسم کنید.
کسل کننده ترین تیم را تیمی می گیریم که میانگین تعداد گل های زده ی ۲ طرف در بازی های آن تیم کمینه باشد. کسل کننده ترین لیگ را هم به طور مشابه.
laCA %>%
group_by(team) %>%
summarize(not_boringness = (sum(GF) + sum(GA)) / sum(games)) %>%
arrange(not_boringness) -> mostBoringTeams
kable(mostBoringTeams %>% head(1))| team | not_boringness |
|---|---|
| Real Burgos | 2.105263 |
laCA %>%
group_by(Season) %>%
summarize(not_boringness = (sum(GF) + sum(GA)) / sum(games)) %>%
arrange(not_boringness) -> mostBoringSeasons
kable(mostBoringSeasons %>% head(1))| Season | not_boringness |
|---|---|
| 1972 | 2.143791 |
ggplot(
mostBoringTeams %>% head(10),
aes(
x = reorder(team, -not_boringness),
y = not_boringness,
fill = not_boringness
)
) +
geom_bar(stat = "identity") +
guides(fill = F) +
xlab("Team") +
ylab("Avg. goals in each game") +
ggtitle("Top 10 Boring Teams") + coord_flip()mostBoringTeams %>% head(10) %>% arrange(not_boringness) %>%
mutate(not_boringness = round(not_boringness, digits = 2)) %>%
hchart(type = "bar",
hcaes(x = team, y = not_boringness, color = not_boringness),
name = "Avg. goals in each game") %>%
hc_title(text = "Top 10 Boring Teams") %>%
hc_xAxis(title = list(text = "Teams")) %>%
hc_yAxis(title = list(text = "Avg. goals in each game")) %>%
hc_add_theme(hc_theme_sandsignika())ggplot(
mostBoringSeasons %>% head(10),
aes(
x = reorder(Season, -not_boringness),
y = not_boringness,
fill = not_boringness
)
) +
geom_bar(stat = "identity") +
guides(fill = F) +
xlab("Season") +
ylab("Avg. goals in each game") +
ggtitle("Top 10 Boring Seasons") + coord_flip()mostBoringSeasons %>% head(10) %>% arrange(not_boringness) %>%
mutate(not_boringness = round(not_boringness, digits = 2)) %>%
hchart(type = "bar",
hcaes(x = as.factor(Season), y = not_boringness, color = not_boringness),
name = "Avg. goals in each game") %>%
hc_title(text = "Top 10 Boring Seasons") %>%
hc_xAxis(title = list(text = "Season")) %>%
hc_yAxis(title = list(text = "Avg. goals in each game")) %>%
hc_add_theme(hc_theme_sandsignika())۳. در چند درصد موارد قهرمان نیم فصل در پایان فصل قهرمان شده است؟
ابتدا بازی های نیم فصل اول را جدا می کنیم و مشابه سوال ۱ قهرمان را از میان آن ها به دست می آوریم. سپس قهرمان های نیم فصل را به همراه قهرمان های کل فصل در یک جدول می آوریم و نتیجه را بررسی می کنیم.
laAll %>%
group_by(Season, team, opp) %>%
arrange(Date) %>%
top_n(n = 1, wt = desc(Date)) -> firstHalfSeasonsAll
laCAHalf <- firstHalfSeasonsAll %>%
group_by(team, Season, tier, round, group) %>%
summarize(
GF = sum(GF),
GA = sum(GA),
GD = sum(GD),
win = sum(win),
lose = sum(lose),
draw = sum(draw),
score = sum(score),
score3 = sum(score3),
games = n()
) %>% ungroup()
halfChampions <- laCAHalf %>%
group_by(Season) %>%
top_n(n = 1, wt = score3) %>%
top_n(n = 1, wt = GD) %>%
top_n(n = 1, wt = GF) %>%
ungroup()
halfChampions %>% select(halfChamp = team, Season) -> halfChampions
halfAndFinalChamp <-
full_join(x = halfChampions,
y = champions %>%
select(Champ = team, Season),
by = 'Season')%>%
mutate(isTheSame = (halfChamp == Champ))
ggplot(halfAndFinalChamp, aes(x = Season, y = ""))+
geom_point(aes(color = isTheSame), stat = "identity", size = 5) +
xlab("Season") +
ylab("The Same?")+
ggtitle("Half-Season Champ = Season Champ")halfAndFinalChamp %>%
group_by(isTheSame) %>%
summarize(count = n()) %>%
ungroup() -> shareChamp
ggpie <- function (dat, by, totals) {
ggplot(dat, aes_string(x = factor(1), y = totals, fill = by)) +
geom_bar(stat = 'identity', color = 'black') +
xlab("") +
ylab("") +
theme(
axis.line = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_blank()
) + # removes black borders from legend
coord_polar(theta = 'y') +
scale_y_continuous(breaks = cumsum(dat[[totals]]) - dat[[totals]] /
2, labels = dat[[by]])
}
ggpie(dat = shareChamp, by = "isTheSame", totals = "count") +
ggtitle("Half-Season Champ = Season Champ")halfAndFinalChamp %>%
hchart(type = "point", hcaes(
x = Season,
y = 1 * isTheSame,
color = isTheSame
)) %>%
hc_title(text = "Half-Season Champ = Season Champ") %>%
hc_xAxis(title = list(text = "Season")) %>%
hc_yAxis(title = list(text = "The Same?")) %>%
hc_add_theme(hc_theme_sandsignika())shareChamp %>%
hchart(type = "pie",
hcaes(
x = isTheSame,
y = count,
name = as.factor(isTheSame)
),
name = "count") %>%
hc_title(text = "Half-Season Champ = Season Champ") %>%
hc_add_theme(hc_theme_sandsignika())kable(sprintf("%0.2f%%", mean(halfAndFinalChamp$isTheSame) * 100))| x |
|---|
| 52.33% |
۴. در بین سال های ۲۰۰۱ تا ۲۰۱۰ گربه سیاه تیم های بزرگ چه تیم هایی بوده است؟
برای محاسبه ی گربه سیاه ها ابتدا باید بهترین تیم ها را پیدا کنیم که این کار را محاسبه ی جمع امتیازات تیم ها برداشتن ۶ تیم اول آن ها انجام می دهیم. سپس برای این تیم ها یک تیم از میان تیم های دیگر را که نسبت باخت این تیم ها به آن تیم بیشینه بوده پیدا می کنیم و آن را به عنوان گربه سیاه آن تیم معرفی می کنیم.
laCA_1_10 <- laCA %>% filter(Season <= 2010, Season >= 2001)
laAll_1_10 <- laAll %>% filter(Season <= 2010, Season >= 2001)
laCA %>%
group_by(team) %>%
summarize(scoresum = sum(score3)) %>%
top_n(n = 6, wt = scoresum) %>%
arrange(-scoresum) -> greatest_teams
greatest_teams_games <- laAll_1_10 %>%
filter(team %in% greatest_teams$team)
laCA_1_10 <- laCA %>% filter(Season <= 2010, Season >= 2001)
laAll_1_10 <- laAll %>% filter(Season <= 2010, Season >= 2001)
laAll %>%
group_by(team) %>%
summarize(scoresum = mean(score3)) %>%
top_n(n = 7, wt = scoresum) %>%
arrange(-scoresum) -> greatest_teams
greatest_teams_games <- laAll_1_10 %>%
filter(team %in% greatest_teams$team)
greatest_teams_games %>%
filter(!(opp %in% greatest_teams$team)) %>%
group_by(team, opp) %>%
summarize(loseRate = mean(lose), GD = sum(GD)) %>%
ungroup() %>%
group_by(team) %>%
top_n(n = 1, wt = loseRate) %>%
top_n(n = 1, wt = -GD) %>%
top_n(n = 1, wt= opp) %>%
ungroup() -> black_cats
kable(black_cats)| team | opp | loseRate | GD |
|---|---|---|---|
| Athletic Bilbao | Deportivo La Coruna | 0.5 | -7 |
| Atletico Madrid | Celta Vigo | 0.5 | -3 |
| FC Barcelona | Rayo Vallecano | 0.5 | 1 |
| Real Madrid | UD Las Palmas | 0.5 | 5 |
| Sevilla FC | UD Las Palmas | 0.5 | -1 |
| Valencia CF | Racing Santander | 0.5 | -1 |
| Villarreal CF | CD Alaves | 0.5 | -1 |
۵. در تاریخ لالیگا کدام تیم رکورددار زودترین قهرمانی است؟ همچنین کدام تیم مقتدرانه ترین قهرمانی را داشته است؟
laAll %>%
group_by(Season, team) %>%
arrange(Date) %>%
mutate(
current_score = cumsum(score3),
current_GD = cumsum(GD),
current_GF = cumsum(GF),
week = row_number(),
number_of_all_games = n()
) %>%
mutate(left_games = number_of_all_games - week) %>%
select(
Season,
week,
team,
current_score,
current_GD,
current_GF,
Date,
number_of_all_games,
left_games
) %>% ungroup()-> week_chart
week_chart %>%
group_by(Season, week) %>%
arrange(-current_score, -current_GD, -current_GF) %>%
mutate(current_rank = row_number()) %>% ungroup ()-> week_chart_rank
champions_week_left <- week_chart_rank %>%
group_by(Season, week) %>%
mutate(
bot_diff = current_score - lead(current_score, 1)
) %>%
filter(current_rank == 1, bot_diff > 3 * left_games) %>%
group_by(Season) %>%
top_n(n = 1, wt = left_games) %>%
select(Season, Cahmpion = team, Weeks_Early = left_games)%>%
ungroup() %>%
arrange(-Weeks_Early)
kable(champions_week_left %>%
top_n(n = 1, wt = Weeks_Early) %>% arrange(Season))| Season | Cahmpion | Weeks_Early |
|---|---|---|
| 1960 | Real Madrid | 5 |
| 1962 | Real Madrid | 5 |
| 1973 | FC Barcelona | 5 |
| 1974 | Real Madrid | 5 |
| 1990 | FC Barcelona | 5 |
معیارمان برای مقتدرانه بودن نسبت امتیاز تیم قهرمان به تیم دوم جدول در انتهای فصل است. حال ۱۰ قهرمانی مقتدرانه تاریخ را پیدا می کنیم.
greatest_championships <-week_chart_rank %>%
filter(left_games == 0, current_rank %in% c(1,2)) %>%
group_by(Season) %>%
arrange(current_rank) %>%
mutate(bot_diff_ratio = current_score / lead(current_score, 1)) %>%
ungroup() %>%
filter(current_rank ==1) %>%
select(Season, Champion = team, bot_diff_ratio) %>%
arrange(-bot_diff_ratio) %>% head(10) %>%
mutate(bot_diff_ratio = round(bot_diff_ratio, digits = 2))
greatest_championships %>%
hchart(type = "bar",
hcaes(x = paste(Champion, "(", Season, ")"), y = bot_diff_ratio, color = Champion,
name = Season), name = "Score ratio to runner up") %>%
hc_title(text = "Greatest Championships") %>%
hc_xAxis(title = list(text = "Team/Season")) %>%
hc_yAxis(title = list(text = "Score ratio to runner up"), min = 1) %>%
hc_add_theme(hc_theme_sandsignika())ggplot(
greatest_championships,
aes(
x = reorder(paste(Champion, "(", Season, ")"), bot_diff_ratio),
y = bot_diff_ratio,
fill = Champion
)
) +
geom_bar(stat = "identity") +
guides(fill = F) +
xlab("Team/Season") +
ylab("Score ratio to runner up") +
ggtitle("Greatest Championships") + coord_flip()۶. طولانی ترین نوار پیروزی مساوی و شکست مال چه تیم هایی است؟
# We determine consecutive results by score gained in the game
laAll %>%
group_by(team) %>%
arrange(Date)%>%
summarize(
cons_wins = max(rle(win)[["lengths"]][rle(win)[["values"]]]),
cons_draws = max(rle(draw)[["lengths"]][rle(draw)[["values"]]]),
cons_lose = max(rle(lose)[["lengths"]][rle(lose)[["values"]]])
) -> cons_all
cons_all %>% arrange(-cons_wins) %>% top_n(n = 5, cons_wins) %>%
select(-cons_draws, -cons_lose)-> most_wins
cons_all %>% arrange(-cons_draws) %>% top_n(n = 5, cons_draws) %>%
select(-cons_wins, -cons_lose)-> most_draws
cons_all %>% arrange(-cons_lose) %>% top_n(n = 5, cons_lose) %>%
select(-cons_draws, -cons_wins)-> most_lose
most_wins %>%
hchart(type = "bar",
hcaes(x = team, y = cons_wins),
name = "Longest Consecutive Wins") %>%
hc_title(text = "Longest Consecutive Wins") %>%
hc_xAxis(title = list(text = "Team")) %>%
hc_yAxis(title = list(text = "Longest Consecutive Win"), min = 5) %>%
hc_add_theme(hc_theme_sandsignika())ggplot(
most_wins,
aes(
x = reorder(team, cons_wins),
y = cons_wins,
fill = ""
)
) +
geom_bar(stat = "identity") +
guides(fill = F) +
xlab("Team") +
ylab("Longest Consecutive Win") +
ggtitle("Longest Consecutive Wins") + coord_flip()most_draws %>%
hchart(type = "bar",
hcaes(x = team, y = cons_draws),
name = "Longest Consecutive Draws") %>%
hc_title(text = "Longest Consecutive Draws") %>%
hc_xAxis(title = list(text = "Team")) %>%
hc_yAxis(title = list(text = "Longest Consecutive Draws"), min = 5) %>%
hc_add_theme(hc_theme_sandsignika())ggplot(
most_draws,
aes(
x = reorder(team, cons_draws),
y = cons_draws,
fill =""
)
) +
geom_bar(stat = "identity") +
guides(fill = F) +
xlab("Team") +
ylab("Longest Consecutive Draws") +
ggtitle("Longest Consecutive Draws") + coord_flip()most_lose %>%
hchart(type = "bar",
hcaes(x = team, y = cons_lose),
name = "Longest Consecutive Losses") %>%
hc_title(text = "Longest Consecutive Losses") %>%
hc_xAxis(title = list(text = "Team")) %>%
hc_yAxis(title = list(text = "Longest Consecutive Losses"), min =5) %>%
hc_add_theme(hc_theme_sandsignika())ggplot(
most_lose,
aes(
x = reorder(team, cons_lose),
y = cons_lose,
fill =""
)
) +
geom_bar(stat = "identity") +
guides(fill = F) +
xlab("Team") +
ylab("Longest Consecutive Losses") +
ggtitle("Longest Consecutive Losses") + coord_flip()۷. زودترین سقوط مال کدام تیم بوده است؟
از داده ی محاسبه شده برای هر هفته استفاده می کنیم و زودترین هفته ای را به دست می آوریم که در آن اختلاف تیم های آخر و ۴م از آخر بیشتر از ۳ برابر تمام بازی های باقیمانده بشود.
#We assume that 3 teams relegate
week_chart %>%
group_by(Season, week) %>%
arrange(-current_score, -current_GD, -current_GF) %>%
mutate(current_rank = row_number()) %>%
mutate(top_3_diff = lag(current_score, 3) - current_score) %>%
filter(current_rank == (number_of_all_games / 2) + 1,
top_3_diff > 3 * left_games) %>%
group_by(Season) %>%
top_n(n = 1, wt = left_games) %>%
select(Season,Team = team, Weeks_Early = left_games) %>%
ungroup() %>%
arrange(-Weeks_Early) -> relegations_week_left
earliest_relegations <-relegations_week_left %>%
top_n(n = 5, wt = Weeks_Early) %>% arrange(-Weeks_Early,Season)
earliest_relegations%>%
hchart(type = "bar",
hcaes(x = paste(Team, "(",Season, ")") , y = Weeks_Early),
name = "Weeks Before Season End") %>%
hc_title(text = "Earliest Relegations") %>%
hc_xAxis(title = list(text = "Team")) %>%
hc_yAxis(title = list(text = "Weeks Before Season End"), min =2) %>%
hc_add_theme(hc_theme_sandsignika())ggplot(
earliest_relegations,
aes(
x = reorder(paste(Team, "(",Season, ")"), Weeks_Early),
y = Weeks_Early,
fill =""
)
) +
geom_bar(stat = "identity") +
guides(fill = F) +
xlab("Team") +
ylab("Weeks Before Season End") +
ggtitle("Earliest Relegations") + coord_flip()۸. مانند شکل بالا تصویری از روند تغییر رتبه تیم ها در طول فصل ۱۹۹۸ رسم نمایید.
week_chart_rank %>%
filter(Season == 1998) %>%
select(week, team, current_rank, Date) %>%
group_by(week) %>% mutate(week_end_date = min(Date)) %>%
ungroup() %>% arrange(week) -> chart_98
chart_98 %>%
hchart("line", hcaes(x = week_end_date, y = current_rank, group = team)) %>%
hc_title(text = "1998 League") %>%
hc_xAxis(
title = list(text = "Week"),
tickInterval = 1,
type = 'datetime',
labels = list(rotation = -45, format = '{value:%b %e}')
) %>%
hc_yAxis(
title = list(text = "Rank"),
reversed = T,
tickInterval = 1,
max = 20,
min = 1
) %>%
hc_tooltip(crosshairs = T) %>%
hc_add_theme(hc_theme_sandsignika())chart_98 %>%
ggplot(aes(
x = week_end_date,
y = current_rank,
group = team,
color = team
)) +
geom_line() +
scale_y_reverse(breaks = 1:20)+
scale_x_date(breaks = seq.Date(
min(chart_98$week_end_date),
max(chart_98$week_end_date),
"week"
),
labels = date_format("%b %d")) +
theme(axis.text.x = element_text(
angle = 45,
size = 10,
hjust = 1,
vjust = 1,
family = "Helvetica"
)) +
xlab("Week") +
ylab("Rank") +
ggtitle("1998 League")۹. جدولی مشابه بالا برای فصل ۲۰۱۲ از کل نتایج طراحی کنید.
با استفاده از kable
la_2012 <- la %>% filter(Season == 2012) %>% select(home, visitor, FT)
la_2012 %>% tidyr::spread(visitor, FT) -> chart_12
kable(chart_12, format = "html") %>%
kable_styling("striped", full_width = F) %>%
scroll_box(height = "10in")| home | Athletic Bilbao | Atletico Madrid | CA Osasuna | Celta Vigo | Deportivo La Coruna | Espanyol Barcelona | FC Barcelona | Getafe CF | Granada CF | Levante UD | Malaga CF | Rayo Vallecano | RCD Mallorca | Real Betis | Real Madrid | Real Sociedad | Real Valladolid | Real Zaragoza | Sevilla FC | Valencia CF |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Athletic Bilbao | NA | 3-0 | 1-0 | 1-0 | 1-1 | 0-4 | 2-2 | 1-2 | 1-0 | 0-1 | 0-0 | 1-2 | 2-1 | 3-5 | 0-3 | 1-3 | 2-0 | 0-2 | 2-1 | 1-0 |
| Atletico Madrid | 4-0 | NA | 3-1 | 1-0 | 6-0 | 1-0 | 1-2 | 2-0 | 5-0 | 2-0 | 2-1 | 4-3 | 0-0 | 1-0 | 1-2 | 0-1 | 2-1 | 2-0 | 4-0 | 1-1 |
| CA Osasuna | 0-1 | 0-2 | NA | 1-0 | 2-1 | 0-2 | 1-2 | 1-0 | 1-2 | 4-0 | 0-0 | 1-0 | 1-1 | 0-0 | 0-0 | 0-0 | 0-1 | 1-0 | 2-1 | 0-1 |
| Celta Vigo | 1-1 | 1-3 | 2-0 | NA | 1-1 | 1-0 | 2-2 | 2-1 | 2-1 | 1-1 | 0-1 | 0-2 | 1-1 | 0-1 | 1-2 | 1-1 | 3-1 | 2-1 | 2-0 | 0-1 |
| Deportivo La Coruna | 1-1 | 0-0 | 2-0 | 3-1 | NA | 2-0 | 4-5 | 1-1 | 0-3 | 0-2 | 1-0 | 0-0 | 1-0 | 2-3 | 1-2 | 0-1 | 0-0 | 3-2 | 0-2 | 2-3 |
| Espanyol Barcelona | 3-3 | 0-1 | 0-3 | 1-0 | 2-0 | NA | 0-2 | 0-2 | 0-1 | 3-2 | 0-0 | 3-2 | 3-2 | 1-0 | 1-1 | 2-2 | 0-0 | 1-2 | 2-2 | 3-3 |
| FC Barcelona | 5-1 | 4-1 | 5-1 | 3-1 | 2-0 | 4-0 | NA | 6-1 | 2-0 | 1-0 | 4-1 | 3-1 | 5-0 | 4-2 | 2-2 | 5-1 | 2-1 | 3-1 | 2-1 | 1-0 |
| Getafe CF | 1-0 | 0-0 | 1-1 | 3-1 | 3-1 | 0-2 | 1-4 | NA | 2-2 | 0-1 | 1-0 | 1-2 | 1-0 | 2-4 | 2-1 | 2-1 | 2-1 | 2-0 | 1-1 | 0-1 |
| Granada CF | 1-2 | 0-1 | 3-0 | 2-1 | 1-1 | 0-0 | 1-2 | 2-0 | NA | 1-1 | 1-0 | 2-0 | 1-2 | 1-5 | 1-0 | 0-0 | 1-1 | 1-2 | 1-1 | 1-2 |
| Levante UD | 3-1 | 1-1 | 0-2 | 0-1 | 0-4 | 3-2 | 0-4 | 0-0 | 3-1 | NA | 1-2 | 2-3 | 4-0 | 1-1 | 1-2 | 2-1 | 2-1 | 0-0 | 1-0 | 1-0 |
| Malaga CF | 1-0 | 0-0 | 1-0 | 1-1 | 3-1 | 0-2 | 1-3 | 2-1 | 4-0 | 3-1 | NA | 1-2 | 1-1 | 4-0 | 3-2 | 1-2 | 2-1 | 1-1 | 0-0 | 4-0 |
| Rayo Vallecano | 2-2 | 2-1 | 2-2 | 3-2 | 2-1 | 2-0 | 0-5 | 3-1 | 1-0 | 3-0 | 1-3 | NA | 2-0 | 3-0 | 0-2 | 0-2 | 1-2 | 0-2 | 0-0 | 0-4 |
| RCD Mallorca | 0-1 | 1-1 | 1-1 | 1-0 | 2-3 | 2-1 | 2-4 | 1-3 | 1-2 | 1-1 | 2-3 | 1-1 | NA | 1-0 | 0-5 | 1-0 | 4-2 | 1-1 | 2-1 | 2-0 |
| Real Betis | 1-1 | 2-4 | 2-1 | 1-0 | 1-1 | 1-0 | 1-2 | 0-0 | 1-2 | 2-0 | 3-0 | 1-2 | 1-2 | NA | 1-0 | 2-0 | 0-0 | 4-0 | 3-3 | 1-0 |
| Real Madrid | 5-1 | 2-0 | 4-2 | 2-0 | 5-1 | 2-2 | 2-1 | 4-0 | 3-0 | 5-1 | 6-2 | 2-0 | 5-2 | 3-1 | NA | 4-3 | 4-3 | 4-0 | 4-1 | 1-1 |
| Real Sociedad | 2-0 | 0-1 | 0-0 | 2-1 | 1-1 | 0-1 | 3-2 | 1-1 | 2-2 | 1-1 | 4-2 | 4-0 | 3-0 | 3-3 | 3-3 | NA | 4-1 | 2-0 | 2-1 | 4-2 |
| Real Valladolid | 2-2 | 0-3 | 1-3 | 0-2 | 1-0 | 1-1 | 1-3 | 2-1 | 1-0 | 2-0 | 1-1 | 6-1 | 3-1 | 0-1 | 2-3 | 2-2 | NA | 2-0 | 1-1 | 1-1 |
| Real Zaragoza | 1-2 | 1-3 | 3-1 | 0-1 | 5-3 | 0-0 | 0-3 | 0-1 | 0-0 | 0-1 | 0-1 | 3-0 | 3-2 | 1-2 | 1-1 | 1-2 | 0-1 | NA | 2-1 | 2-2 |
| Sevilla FC | 2-1 | 0-1 | 1-0 | 4-1 | 3-1 | 3-0 | 2-3 | 2-1 | 3-0 | 0-0 | 0-2 | 2-1 | 3-2 | 5-1 | 1-0 | 1-2 | 1-2 | 4-0 | NA | 4-3 |
| Valencia CF | 3-2 | 2-0 | 4-0 | 2-1 | 3-3 | 2-1 | 1-1 | 4-2 | 1-0 | 2-2 | 5-1 | 0-1 | 2-0 | 3-0 | 0-5 | 2-5 | 2-1 | 2-0 | 2-0 | NA |
با استفاده از geom_tile
la_2012_other <-
la %>% filter(Season == 2012) %>% select(home, visitor, FT, hgoal)
teams_12 <- unique(la_2012 %>% select(home))
la_2012_self <- teams_12 %>%
mutate(visitor = home,
FT = "",
hgoal = 0)
la_2012 <- rbind(la_2012_other, la_2012_self)
scale_fill_Matrix <- function(...) {
library(scales)
discrete_scale("fill", "Publication", manual_pal(values = c("white",
"lightBlue")), ...)
}
ggplot(la_2012, aes(reorder(home, -desc(home)), reorder(visitor, desc(visitor)))) +
# x and y axes => Var1 and Var2
geom_tile(aes(fill = as.factor((
as.numeric(as.factor(home)) +
as.numeric(as.factor(visitor))
) %% 2)), color = "black") + # background colours are mapped according to the value column
geom_text(aes(label = FT)) + # write the values
theme(
panel.grid.major.x = element_blank(),
#no gridlines
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.background = element_rect(fill = "white"),
# background=white
axis.text.x = element_text(
angle = 40,
hjust = 0,
vjust = -2,
size = 12,
face = "bold"
),
plot.title = element_text(size = 20, face = "bold"),
axis.text.y = element_text(size = 12, face = "bold")
) +
ggtitle("1998 League") +
theme(legend.title = element_text(face = "bold", size = 14)) +
scale_x_discrete(name = "", position = "top") +
scale_y_discrete(name = "") +
labs(fill = "") +
geom_abline(slope = -1, intercept = 21) +
guides(fill = F) + scale_fill_Matrix()۱۰. سه آماره به همراه نمودار فردوسی پسند استخراج کنید.
در این نمودار نمودار میانگین تعداد گل های هفته های مختلف لیگ را از ۱۹۹۷ به بعد می کشیم. به طور شهودی احتمالن باید این نمودار نمودار یونیفورمی باشد. ولی چیزی که مشاهده می کنیم این است که تعداد گل ها به طور کلی روندی صعودی دارد و یونیفورم هم نیست.
laAll %>%
group_by(Season, team) %>%
arrange(Date) %>%
mutate(week = row_number()) %>%
ungroup() %>%
group_by(Season , week, team) %>%
mutate(goals = sum(GA) + sum(GF)) %>%
ungroup() %>%
filter(Season > 1996) %>%
group_by(week) %>%
summarize(avg_goals = round(mean(goals), 3)) %>%
arrange(week) %>%
mutate(type = "original")-> weeks_avg_goals
coeff = coef(lm(avg_goals ~ week, data = weeks_avg_goals))
regression_line = round(coeff[2]*weeks_avg_goals$week + coeff[1],3)
regress <- data.frame(avg_goals = regression_line, week = weeks_avg_goals$week, type = "lm")
rbind(weeks_avg_goals, regress) %>%
hchart(type = "line", hcaes(x = week, y = avg_goals, group = type, name = type),
name = "Avg. Goals") %>%
hc_title(text = "1997 - 2016 Weeks Avg. Goals") %>%
hc_xAxis(title = list(text = "Week")) %>%
hc_yAxis(title = list(text = "Avg. Goals")) %>%
hc_tooltip(crosshairs = T, shared = T) %>%
hc_add_theme(hc_theme_sandsignika())ggplot(weeks_avg_goals, aes(x = week, y = avg_goals, color = "")) +
geom_line() +
geom_smooth(method = "lm") +
ylab("Avg. Goals") +
xlab("Week") +
ggtitle("1997 - 2016 Weeks Avg. Goals") +
guides(color = F)در این نمودار عملکرد کلی تیم ها را در بازی خانکی و در بازی های بیرون از خانه مقایسه می کنیم. و این مسئله را بررسی می کنیم که چه تاثیری دارد و اینکه آیا مهمان یا میزبان بودن با فرض بردن بازی تغییری در توزیع نتایج ایجاد می کند یا نه.
la %>%
filter(hgoal != vgoal, hgoal <= 5, vgoal <= 5) %>%
mutate(HA = paste(hgoal, "-", vgoal),
AH = paste(vgoal, "-", hgoal)) %>% select(HA, AH, hgoal, vgoal) -> HA_AH
HA_AH %>%
filter(hgoal > vgoal) %>%
select(AH) %>%
group_by(AH) %>%
summarise(count = n(), type = "A-H") %>%
mutate(density = round(count / sum(count), 3)) %>%
select(result = AH, count, density, type) -> AH
HA_AH %>%
filter(hgoal < vgoal) %>%
select(HA) %>%
group_by(HA) %>%
summarise(count = n(), type = "H-A") %>%
mutate(density = round(count / sum(count), 3)) %>%
select(result = HA, count, density, type) -> HA
rbind(HA, AH) -> AH_HA_count
AH_HA_count %>%
hchart(type = "line", hcaes(x = result, y = count, group = type)) %>%
hc_title(text = "Home-Away & Away-Home Results Comparison (Count)") %>%
hc_xAxis(title = list(text = "Count")) %>%
hc_xAxis(title = list(text = "Result")) %>%
hc_tooltip(crosshairs = T, shared = T) %>%
hc_add_theme(hc_theme_sandsignika())AH_HA_count %>%
hchart(type = "line", hcaes(x = result, y = density, group = type)) %>%
hc_title(text = "Home-Away & Away-Home Results Comparison (Density)") %>%
hc_xAxis(title = list(text = "Density")) %>%
hc_xAxis(title = list(text = "Result")) %>%
hc_tooltip(crosshairs = T, shared = T) %>%
hc_add_theme(hc_theme_sandsignika())ggplot(AH_HA_count,
aes(
x = result,
y = count,
group = type,
fill = type
)) +
geom_area( position = "dodge", alpha = 0.5)+
xlab("Result")+
ylab("Count")+
ggtitle("Home-Away & Away-Home Results Comparison (Count)")ggplot(AH_HA_count,
aes(
x = result,
y = density,
group = type,
fill = type
)) +
geom_area(position = "dodge", alpha = 0.5)+
xlab("Result")+
ylab("Density")+
ggtitle("Home-Away & Away-Home Results Comparison (Density)")همانطور که مشاهده می کنید در اکثر اوقات و اکثر نتایج برنده تیم میزبان است ولی اگر چگالی را برای حالت برنده بودن را مقایسه کنیم تفاوت مشهودی وجود ندارد. یعنی به طور کلی میزبان بودن شانس برد را بیشتر می کند ولی روی توزیع شرطی تاثیر مشهودی ندارد. الگوی دیگری که مشاهده می کنیم این است که با زیاد شدن تفاضل گل در بازی برده شده احتمال اینکه در خانه باشیم با شیب نسبتن خطی ای افزایش می یابد.
در این مسئله مقتدرانه ترین کامبک های تاریخ لالیگا را بررسی می کنیم. به این معنا که بازی هایی را به دست می آوریم که در آن ها یک تیم در نیمه ی اول بازی بازنده بوده است ولی سپس در نیمه ی دوم با بیشترین تفاضل گل ممکن بازی را می برد.
la %>%
separate(HT, sep = "-", into = c("hgoal_h", "vgoal_h")) -> la_h
la_h$hgoal_h <- as.numeric(la_h$hgoal_h)
la_h$vgoal_h <- as.numeric(la_h$vgoal_h)
lah_h <- la_h %>%
mutate(
team = home,
opp = visitor,
GF = hgoal,
GA = vgoal,
GD = hgoal - vgoal,
GF_h = hgoal_h,
GA_h = vgoal_h,
GD_h = hgoal_h - vgoal_h,
win = hgoal > vgoal,
lose = hgoal < vgoal,
draw = hgoal == vgoal
) %>%
mutate(score = (Season >= 1995) * win * 3 + (Season <= 1994) * win * 2 + draw) %>%
mutate(score3 = win * 3 + draw) %>%
select(-home, -visitor, -hgoal, -vgoal, -FT, -hgoal_h, -vgoal_h)
lav_h <- la_h %>%
mutate(
team = visitor,
opp = home,
GF = vgoal,
GA = hgoal,
GD = vgoal - hgoal,
GF_h = vgoal_h,
GA_h = hgoal_h,
GD_h = vgoal_h - hgoal_h,
win = hgoal < vgoal,
lose = hgoal > vgoal,
draw = hgoal == vgoal
) %>%
mutate(score = (Season >= 1995) * win * 3 + (Season <= 1994) * win * 2 + draw) %>%
mutate(score3 = win * 3 + draw) %>%
select(-home, -visitor, -hgoal, -vgoal, -FT, -hgoal_h, -vgoal_h)
laAll_h <- rbind(lav_h, lah_h) %>%
mutate(comeback = GD - GD_h)
laAll_h %>%
filter(GD_h < 0, GD > 0) %>%
arrange(-comeback) %>%
select(-score,
-score3,
-win,
-draw,
-lose,
-round,
-notes,
-tier,
-group,
-Date) %>%
head(11) -> greatest_comebacks
greatest_comebacks %>%
hchart(type = "bar",
hcaes(
x = paste(team, "-", opp, "(", Season, ",", GF_h, "-", GA_h, ",", GF, "-", GA, ")") ,
y = comeback,
color = team
),
name = "GD in second half") %>%
hc_title(text = "Greatest Comebacks Ever") %>%
hc_xAxis(title = list(text = "Team")) %>%
hc_yAxis(title = list(text = "GD in second half"),
min = 2) %>%
hc_add_theme(hc_theme_sandsignika())ggplot(greatest_comebacks,
aes(
x = reorder(paste(team, "-", opp, "(", Season, ",", GF_h, "-", GA_h, ",", GF, "-", GA, ")"), comeback),
y = comeback,
fill = team
)) +
geom_bar(stat = "identity") +
guides(fill = F) +
xlab("Team") +
ylab("GD in second half") +
ggtitle("Greatest Comebacks Ever") + coord_flip()